#***********************************************************************#
#Replication code for 
#"Measuring the Risk of Corruption in Latin American Political Parties. 
#De jure Analysis of Institutions"
#***********************************************************************#

#Required Packages
install.packages("Hmisc")
install.packages("dplyr")  
install.packages("ggplot2")  
install.packages("corrplot")  
install.packages("reshape2")
install.packages("qgraph")
install.packages("ggcorrplot")
install.packages("pastecs") 
install.packages("EFAtools")
install.packages("stats")
install.packages("FactoMineR")
install.packages("factoextra")
install.packages("collapse")
install.packages("tibble")
install.packages("xtable")
install.packages("tidytext")
install.packages("base")
install.packages("expss")
install.packages("openxlsx")
install.packages("forcats")
install.packages("ggpubr")

library(ggplot2)
library(corrplot)
library(reshape2)
library(qgraph)
library(ggcorrplot)
library(EFAtools)
library(stats)
library(FactoMineR)
library(factoextra)
library(pastecs)
library(collapse)
library(tibble)
library(Hmisc)
library(xtable)
library(tidytext)
library(dplyr) 
library(base)
library(expss)
library(ggrepel)
library(forcats)




##########################################################################################
#****************************************************************************************#
#***************************POLITICAL PARTY LEVEL ANALYSIS*******************************#
#****************************************************************************************#
##########################################################################################


###############################
###1. Calculating the Index ###
###############################
#1.1 Data Input: Political Parties

dta_pp <- read.csv("C:/Users/giova/OneDrive/Research_Projects/Risk_Corruption/R_script/roc_parties.csv", header=T,stringsAsFactors=FALSE,sep=",")
dta_pp$r_commit <- as.numeric(dta_pp$r_commit)
dta_pp$national_pp <- as.factor(dta_pp$national_pp)
sapply(dta_pp, class)
head(dta_pp,4)
sapply(dta_pp, class)

#1.2 Descriptive statistics

#***********************************#
#Function for descriptive statistics#
#***********************************#

des_stat <- function(x) {
  c(mean=mean(x,na.rm=T), sd=sd(x,na.rm=T), max= max(x,na.rm=T), min=min(x,na.rm=T),var=var(x,na.rm=T),median=median(x,na.rm=T),NA_c=sum(is.na(x)),NULL_c=sum(x=0,na.rm=T),N=NROW(na.omit(x)))
}

#***********************************#

dta <- dta_pp  %>%
  select(r_trans, r_leaders, r_candida, r_commit)

#Table descriptive statistics
NumericStatistic <- apply(dta,2,des_stat)
NumericStatistic <- as.data.frame(t(NumericStatistic))
NumericStatistic <- round(NumericStatistic,2)
NumericStatistic <- xtable(NumericStatistic)
print.xtable(NumericStatistic,type="latex")

#Correlation Matrix
CorMat <- cor(as.matrix(dta), method = "pearson")
colnames(CorMat) <- c("Transparency Risk", "Leaders Risk", "Candidate Risk", "Commitment Risk")
rownames(CorMat) <- c("Transparency Risk", "Leaders Risk", "Candidate Risk", "Commitment Risk")


#Correlation plots
ggcorrplot(CorMat, hc.order = T,type = "lower", lab = TRUE,
           outline.col = "white",
           ggtheme = ggplot2::theme_bw,
           show.diag=TRUE,
           colors = c("black", "white", "black"))

#1.3. Test Factor Analysis Raw Data
#EFA
N_FACTORS(CorMat, N = 85, method = "ML", criteria=c("PARALLEL","EKC","KGC","SCREE","SMT"))


#1.4. Normalization
#I'm going to use the min-max normalization because I would like to have some variance. The normalization gives a range between 0 and 1.
dta_pp <- dta_pp %>%
  mutate(nr_trans=((dta_pp$r_trans-0)/5)) %>%
  mutate(nr_leaders=((dta_pp$r_leaders-0)/4)) %>%
  mutate(nr_candida=((dta_pp$r_candida-0)/4)) %>%
  mutate(nr_commit=((dta_pp$r_commit-0)/2))
head(dta_pp,4)
sapply(dta_pp, class)

ndta <- dta_pp %>%
  select(nr_trans,nr_leaders,nr_candida,nr_commit)
head(ndta, 4)

#Table with Descriptive Stats
norm_stats <- apply(ndta,2,des_stat)
norm_stats <- as.data.frame(t(norm_stats))
norm_stats <- round(norm_stats,2)
norm_stats <- xtable(norm_stats)
print.xtable(norm_stats,type="latex")

#1.5. Calculating the Index
#Arithmetic mean with equal weights
dta_pp <- dta_pp %>%
  mutate(roc_pp0= ((nr_trans+nr_leaders+nr_candida+nr_commit)*25))  ##X11,X21,X31
head(dta_pp, 4)
sapply(dta_pp, class)

#Descriptive stats
roc_pp_stat <- des_stat(dta_pp$roc_pp0)
roc_pp_stat <- round(roc_pp_stat,2)
roc_pp_stat

roc_pp_stat <- group_by(dta_pp, country) %>% 
  summarise(
    Average = mean(roc_pp0, na.rm = TRUE),
    SD = sd(roc_pp0, na.rm = TRUE),
    Min = min(roc_pp0, na.rm = TRUE),
    Max = max(roc_pp0, na.rm = TRUE),
    No = n()
  )
roc_pp_stat <- xtable(roc_pp_stat, type="latex")
print.xtable(roc_pp_stat, type="latex")

#Graph results
groc_pp <- ggplot(dta_pp %>% arrange(desc(roc_pp0)) %>%
                    mutate(party_cname=factor(party_cname, levels=party_cname)), 
                  aes(x=party_cname,y=roc_pp0)) +
  geom_bar(stat="identity",width = 0.75, position = position_dodge(0.8)) +
  coord_flip()+
  facet_wrap(~country,scales = "free_y") +
  scale_fill_continuous(low="grey", high="black")+
  labs(x = "", y="ROC Parties",fill="Nationalization Level")+
  theme_bw() +
  theme(text = element_text(size=7), 
        strip.text.x = element_text(size = 8, face = "bold"),
        panel.spacing=unit(0,"pt"))
groc_pp

#1.6. Collapse by country
roc_pp <- collap(dta_pp$roc_pp0, dta_pp$cod_country, fmean, keep.col.order = T)

colnames(roc_pp) <- c("cod_ctr","av_roc_pp")
cod_ctr <- c("NIC","VEN")
av_roc_pp <- c("NA","NA")
ctr_miss <- data.frame(cod_ctr, av_roc_pp)
roc_pp <- rbind(roc_pp, ctr_miss)
roc_pp$av_roc_pp <- as.numeric(roc_pp$av_roc_pp)
head(roc_pp, 18)

############################
###2. ROBUSTNESS ANALYSIS###
############################

#2.1.Decision about factors that affects sensibility

#2.1.1 Normalization. This factor X1 has two values
#Based is normalization, alternative STANDARIZATION
dta_pp <- dta_pp %>%
  mutate(sr_trans= (r_trans - mean(r_trans))/sd(r_trans)) %>%
  mutate(sr_leaders = (r_leaders - mean(r_leaders))/sd(r_leaders)) %>%
  mutate(sr_candida = (r_candida - mean(r_candida))/sd(r_candida)) %>%
  mutate(sr_commit = (r_commit- mean(r_commit))/sd(r_commit))
head(dta_pp,9)
sapply(dta_pp, class)

sdta <- dta_pp %>%
  select(sr_trans,sr_leaders,sr_candida,sr_commit)
head(sdta)

stand_stats <- apply(sdta,2,des_stat)
stand_stats <- as.data.frame(t(stand_stats))
stand_stats <- round(stand_stats,2)
stand_stats
xtable(stand_stats)

#2.1.2 Weighting. This factor X2 has three values
#Instead of using equal weights, I get PCA weights and BOD
##PCA
res.pca <- PCA(ndta, graph = FALSE)

eig.val <- get_eigenvalue(res.pca)
eig.val
fviz_eig(res.pca, addlabels = TRUE, ylim = c(0, 50))

var <- get_pca_var(res.pca)
fviz_contrib(res.pca, choice = "var", axes = 1)

## Getting weights
contr <- matrix(unname(var$contrib[,1]))
contr <- t(contr)
class(contr)

dta_pp <- dta_pp %>%
  add_column(p_trans = contr[1,1]) %>%
  add_column(p_leaders=contr[1,2]) %>%
  add_column(p_candida=contr[1,3]) %>%
  add_column(p_commit=contr[1,4])
head(dta_pp,9)
sapply(dta_pp, class)


#2.1.3 Aggregation. This factor X3 has two values.
#Alternatively, I use geometric aggregation methods. 

dta_pp <- dta_pp %>%
  mutate(roc_pp1= ((sr_trans+sr_leaders+sr_candida+sr_commit)*25))%>%  ##X12,X21,X31
  mutate(roc_pp2= ((nr_trans*p_trans)+(nr_leaders*p_leaders)+(nr_candida*p_candida)+(nr_commit*p_commit))) %>% ##X11,X22,X31
  mutate(roc_pp3= ((sr_trans*p_trans)+(sr_leaders*p_leaders)+(sr_candida*p_candida)+(sr_commit*p_commit))) %>%  ##X12,X22,X31
  mutate(roc_pp4= (((nr_trans^25)*(nr_leaders^25)*(nr_candida^25)*(nr_commit^25))^(1/100))) %>% ##X11,X21,X32
  mutate(roc_pp5= (((sr_trans^25)*(sr_leaders^25)*(sr_candida^25)*(sr_commit^25))^(1/100))) %>% ##X12,X21,X32 Geometric mean just for positive numbers
  mutate(roc_pp6= (((nr_trans^p_trans)*(nr_leaders^p_leaders)*(nr_candida^p_candida)*(nr_commit^p_commit))^(1/100))) %>% ##X11,X22,X32
  mutate(roc_pp7= (((sr_trans^p_trans)*(sr_leaders^p_leaders)*(sr_candida^p_candida)*(sr_commit^p_commit))^(1/100))) ##X12,X22,X32 Geometric mean just for positive numbers
head(dta_pp, 4)
sapply(dta_pp, class)

#2.2. Analyzing uncertanty
#Descriptive Stats
com <- dta_pp %>%
  select(roc_pp0, roc_pp1, roc_pp2,roc_pp3,roc_pp4,roc_pp5,roc_pp6,roc_pp7)

com_stats <- apply(com,2,des_stat)
com_stats <- as.data.frame(t(com_stats))
com_stats <- round(com_stats,2)
com_stats
xtable(com_stats)

t_pp <- dta_pp %>%
  select(country, party_cname, roc_pp0, roc_pp1, roc_pp2,roc_pp3,roc_pp4,roc_pp6)
t_pp[with(t_pp, order(country, -roc_pp0)), ]
head(t_pp, 4)
sapply(t_pp, class)
xtable(t_pp, digits=2)

##Uncertanty Graph
#Rescaling indexes to 0-100

#**********************#
#Function for rescaling#
#**********************#
rescale <- function(x) (x-min(x))/(max(x) - min(x)) * 100
#**********************#

#Creating median, min and max
t_pp <- t_pp %>%
  mutate(rs_roc0= rescale(roc_pp0)) %>%
  mutate(rs_roc1= rescale(roc_pp1)) %>%
  mutate(rs_roc2= rescale(roc_pp2)) %>%
  mutate(rs_roc3= rescale(roc_pp3)) %>%
  mutate(rs_roc4= rescale(roc_pp4)) %>%
  mutate(rs_roc6= rescale(roc_pp6))
head(t_pp, 4)
sapply(t_pp, class)

t_pp <- t_pp %>%
  mutate(roc_min=apply(t_pp[9:14], 1, FUN = min, na.rm = TRUE)) %>%
  mutate(roc_max=apply(t_pp[9:14], 1, FUN = max, na.rm = TRUE)) %>%
  mutate(roc_mean= rowMeans(t_pp[9:14], na.rm = TRUE))


uncer <- ggplot(t_pp %>% arrange(desc(rs_roc0)) %>%
                  mutate(party_cname=factor(party_cname, levels=party_cname))) +
  geom_point(aes(x = party_cname, y = rs_roc0), size = 1.5) +
  geom_point(aes(x = party_cname, y = roc_mean), shape = 4) +
  geom_errorbar(aes(ymin = roc_min, ymax = roc_max, y=roc_mean,x = party_cname)) +
  labs(x = "Political Parties", y="Rescaling ROC Indexes by Parties")+
  coord_flip()+
  theme_bw()+
  theme(text = element_text(size=7))
uncer


##########################################################################################
#****************************************************************************************#
#*******************************COUNTRY LEVEL ANALYSIS***********************************#
#****************************************************************************************#
##########################################################################################

###############################
###3. Calculating the Index ###
###############################
#3.1 Data Input: Countries
dta_ctr <- read.csv("C:/Users/giova/OneDrive/Research_Projects/Risk_Corruption/R_script/roc_countries.csv", header=T,stringsAsFactors=FALSE,sep=";")
sapply(dta_ctr, class)
dta_ctr$r_trans <- as.numeric(dta_ctr$r_trans)
dta_ctr$r_trans[is.na(dta_ctr$r_trans)]<-0
colnames(dta_ctr)[2] <- c("cod_ctr")
sapply(dta_ctr, class)
head(dta_ctr,4)

#3.2 Descriptive statistics
dta2 <- dta_ctr  %>%
  select(r_funding, r_miscon, r_trans, r_interest,r_EMB)

#Table descriptive statistics
NumericStatistic2 <- sapply(dta2,des_stat)
NumericStatistic2 <- as.data.frame(t(NumericStatistic2))
NumericStatistic2 <- round(NumericStatistic2,2)
NumericStatistic2
xtable(NumericStatistic2)


#Correlation Matrix
CorMat2 <- cor(as.matrix(dta2))
colnames(CorMat2) <- c("Funding Risk", "Misconduct Risk", "Transparency Risk", "Conflict Interest Risk","EMB Risk")
rownames(CorMat2) <- c("Funding Risk", "Misconduct Risk", "Transparency Risk", "Conflict Interest Risk","EMB Risk")


#Correlation plots
ggcorrplot(CorMat2, hc.order = T,type = "lower", lab = TRUE,
           outline.col = "white",
           ggtheme = ggplot2::theme_bw,
           show.diag=TRUE,
           colors = c("black", "white", "black"))

#3.3. Normalization
dta_ctr <- dta_ctr %>%
  mutate(nr_funding=((dta_ctr$r_funding-0)/45)) %>%
  mutate(nr_miscon=((dta_ctr$r_miscon-0)/10)) %>%
  mutate(nr_trans=((dta_ctr$r_trans-0)/15)) %>%
  mutate(nr_interest=((dta_ctr$r_interest-0)/5)) %>%
  mutate(nr_EMB=((dta_ctr$r_EMB-0)/5))
sapply(dta_ctr, class)
head(dta_ctr,4)

ndta2 <- dta_ctr %>%
  select(nr_funding,nr_miscon,nr_trans,nr_interest,nr_EMB)
head(ndta2, 4)

#Table descriptive statistics
norm_stats2 <- sapply(ndta2,des_stat)
norm_stats2 <- as.data.frame(t(norm_stats2))
norm_stats2 <- round(norm_stats2,2)
norm_stats2 <- xtable(norm_stats2)
print.xtable(norm_stats2, type="html")



#3.4. Calculating the Index
##Arithmetic mean with equal weights
dta_ctr <- dta_ctr %>%
  mutate(roc_ctr0= ((nr_funding+nr_miscon+nr_trans+nr_interest+nr_EMB)*20))  ## Additive same weights
head(dta_ctr, 4)
sapply(dta_ctr, class)

##Descriptive stats
roc_ctr_stat <- des_stat(dta_ctr$roc_ctr0)
roc_ctr_stat

##Graph results
groc_ctr <- ggplot(dta_ctr %>% arrange(desc(roc_ctr0)) %>%
                     mutate(country=factor(country, levels=country)), 
                   aes(x=country,y=roc_ctr0)) +
  geom_bar(stat="identity",width = 0.75, position = position_dodge(0.8)) +
  #geom_bar(stat="identity",colour="white") + #Another options with lines in bars
  coord_flip()+
  labs(x = "", y="ROC Country")+
  geom_text(aes(label = round(roc_ctr0,2)), nudge_y = -3, colour = "white", size=3)+
  theme_bw() +
  theme(text = element_text(size=10))
groc_ctr


############################
###4. ROBUSTNESS ANALYSIS###
############################

#4.1.Decision about factors that affects sensibility

#4.1.1 Normalization. This factor X1 has two values
##Based is normalization, alternative STANDARIZATION
dta_ctr <- dta_ctr %>%
  mutate(sr_funding=((r_funding- mean(r_funding))/sd(r_funding))) %>%
  mutate(sr_miscon=((r_miscon- mean(r_miscon))/sd(r_miscon))) %>%
  mutate(sr_trans=((r_trans- mean(r_trans))/sd(r_trans))) %>%
  mutate(sr_interest=((r_interest- mean(r_interest))/sd(r_interest))) %>%
  mutate(sr_EMB=((r_EMB- mean(r_EMB))/sd(r_EMB)))
sapply(dta_ctr, class)
head(dta_ctr,4)

sdta2 <- dta_ctr %>%
  select(sr_funding,sr_miscon,sr_trans,sr_interest,sr_EMB)
head(sdta2,4)

stand_stats2 <- apply(sdta2,2,des_stat)
stand_stats2 <- as.data.frame(t(stand_stats2))
stand_stats2 <- round(stand_stats2,2)
stand_stats2
xtable(stand_stats2)


#4.1.2 Weighting. This factor X2 has three values
#Instead of using equal weights, I get PCA weights and BOD
##PCA
res.pca2 <- PCA(ndta2, graph = FALSE)
eig.val2 <- get_eigenvalue(res.pca2)
eig.val2
fviz_eig(res.pca2, addlabels = TRUE, ylim = c(0, 50))

var2 <- get_pca_var(res.pca2)
fviz_contrib(res.pca2, choice = "var", axes = 1)

## Getting weights
contr2 <- matrix(unname(var2$contrib[,1]))
contr2 <- t(contr2)
class(contr2)

dta_ctr <- dta_ctr %>%
  add_column(p_funding = contr2[1,1]) %>%
  add_column(p_miscon=contr2[1,2]) %>%
  add_column(p_trans=contr2[1,3]) %>%
  add_column(p_interest=contr2[1,4]) %>%
  add_column(p_EMB=contr2[1,5])
head(dta_ctr,9)
sapply(dta_ctr, class)

#4.1.3 Aggregation. This factor X3 has two values.
#Alternatively, I use geometric aggregation methods.
dta_ctr <- dta_ctr %>%
  mutate(roc_ctr1= (roc_ctr0= ((sr_funding+sr_miscon+sr_trans+sr_interest+sr_EMB)*20))) %>%  ##X12,X21,X31
  mutate(roc_ctr2= ((nr_funding*p_funding)+(nr_miscon*p_miscon)+(nr_trans*p_trans)+(nr_interest*p_interest)+(nr_EMB*p_EMB))) %>% ##X11,X22,X31
  mutate(roc_ctr3= ((sr_funding*p_funding)+(sr_miscon*p_miscon)+(sr_trans*p_trans)+(sr_interest*p_interest)+(sr_EMB*p_EMB))) %>%  ##X12,X22,X31
  mutate(roc_ctr4= (((nr_funding^20)*(nr_miscon^20)*(nr_trans^20)*(nr_interest^20)*(nr_EMB^20))^(1/100))) %>% ##X11,X21,X32
  mutate(roc_ctr5= (((sr_funding^20)*(sr_miscon^20)*(sr_trans^20)*(sr_interest^20)*(sr_EMB^20))^(1/100))) %>% ##X12,X21,X32 Geometric mean just for positive numbers
  mutate(roc_ctr6= (((nr_funding^p_funding)*(nr_miscon^p_miscon)*(nr_trans^p_trans)*(nr_interest^p_interest)*(nr_EMB^p_EMB))^(1/100))) %>% ##X11,X22,X32
  mutate(roc_ctr7= (((sr_funding^p_funding)*(sr_miscon^p_miscon)*(sr_trans^p_trans)*(sr_interest^p_interest)*(sr_EMB^p_EMB))^(1/100))) ##X12,X22,X32 Geometric mean just for positive numbers
head(dta_ctr, 4)
sapply(dta_ctr, class)

#4.2. Analyzing uncertainty
##Descriptive Stats
com2 <- dta_ctr %>%
  select(roc_ctr0,roc_ctr1, roc_ctr2, roc_ctr3,roc_ctr4,roc_ctr5,roc_ctr6,roc_ctr7)

com_stats2 <- apply(com2,2,des_stat)
com_stats2 <- as.data.frame(t(com_stats2))
com_stats2 <- round(com_stats2,2)
com_stats2
xtable(com_stats2)


t_ctr <- dta_ctr %>%
  select(country,roc_ctr0,roc_ctr1, roc_ctr2, roc_ctr3,roc_ctr4,roc_ctr5,roc_ctr6,roc_ctr7)
t_ctr<- t_ctr[with(t_ctr, order(-roc_ctr0)), ]
head(t_ctr, 18)
sapply(t_ctr, class)
ind_tab <- xtable(t_ctr, digits=2)
print.xtable(ind_tab, type="latex")


##Uncertanty Graph 
#Rescaling indexes to 0-100
t_ctr <- t_ctr %>%
  mutate(rs_roc0= rescale(roc_ctr0)) %>%
  mutate(rs_roc1= rescale(roc_ctr1)) %>%
  mutate(rs_roc2= rescale(roc_ctr2)) %>%
  mutate(rs_roc3= rescale(roc_ctr3)) %>%
  mutate(rs_roc4= rescale(roc_ctr4)) %>%
  mutate(rs_roc5= rescale(roc_ctr5)) %>%
  mutate(rs_roc6= rescale(roc_ctr6))
head(t_ctr, 18)
sapply(t_ctr, class)

#Creating median, min and max
t_ctr <- t_ctr %>%
  mutate(roc_min=apply(t_ctr[10:16], 1, FUN = min, na.rm = TRUE)) %>%
  mutate(roc_max=apply(t_ctr[10:16], 1, FUN = max, na.rm = TRUE)) %>%
  mutate(roc_mean= rowMeans(t_ctr[10:16], na.rm = TRUE))


uncer <- ggplot(t_ctr %>% arrange(desc(rs_roc0)) %>%
                  mutate(country=factor(country, levels=country))) +
  geom_point(aes(x = country, y = rs_roc0), size = 2) +
  geom_point(aes(x = country, y = roc_mean), shape = 4) +
  geom_errorbar(aes(ymin = roc_min, ymax = roc_max, y=roc_mean,x = country)) +
  labs(x = "Countries", y="Rescaling ROC Indexes by Country")+
  coord_flip()+
  theme_bw()+
  theme(text = element_text(size=7))
uncer

##Heat Plot
#Rankings
t_ctr <- t_ctr %>% 
  mutate(rank0 = dense_rank(desc(-roc_ctr0))) %>% 
  mutate(rank1 = dense_rank(desc(-roc_ctr1))) %>% 
  mutate(rank2 = dense_rank(desc(-roc_ctr2))) %>% 
  mutate(rank3 = dense_rank(desc(-roc_ctr3))) %>% 
  mutate(rank4 = dense_rank(desc(-roc_ctr4))) %>% 
  mutate(rank5 = dense_rank(desc(-roc_ctr5))) %>% 
  mutate(rank6 = dense_rank(desc(-roc_ctr6)))
head(t_ctr, 4)
sapply(t_ctr, class)

for(i in 1:18) { 
  t_ctr[paste0("I",i)]<- count_row_if(i,data=t_ctr)
}
head(t_ctr, 4)
sapply(t_ctr, class)

ind_long2 <- t_ctr %>%
  select(country, I1:I18)
head(ind_long2, 4)
sapply(ind_long2, class)

ind_long2<- melt(ind_long2)

ind_long2$variable <- factor(ind_long2$variable, levels = unique(ind_long2$variable), ordered = TRUE)
ind_long2$country <- factor(ind_long2$country, levels = unique(ind_long2$country), ordered = TRUE)

ggplot(ind_long2, aes(x=variable, y=country)) +
  geom_tile(aes(fill=value))+
  scale_fill_gradient(low="white", high="black")+
  labs(x = "Ranking", y="Country")+
  theme_bw()+
  theme(text = element_text(size=7))

